#######################################################
###This Code Reproduces All Plots. 
###1) it produces the diff-in-diff figure in the paper.
###2) it produces the equivalent plot in the Appendix.
###3) it produces the density plots in the Appendix.
#######################################################
#D. de Kadt & H. Larreguy, 2017

##Set your working directory:
setwd("")
library(foreign)

##Read in data:
d = read.dta("ward_level_data20002014.dta")

##Redefine WD to results folder:
setwd(paste(getwd(), "/results", sep=""))

#Dropping 2014 Election.
d = subset(d, d$year!=2014) 

##Set up storage:
c = data.frame(matrix(NA,5,0))
c$year = c(2000,2004,2006,2009,2011)

##Take conditional means and CIs:
##KWA ZULU
c$anc_vs_na_kz[1] = mean(na.omit(d$anc_vs_na[which(d$dummy_1km==1 & d$dummy_KwaZulu==1 & d$year==2000)]))
c$anc_vs_na_kz[2] = mean(na.omit(d$anc_vs_na[which(d$dummy_1km==1 & d$dummy_KwaZulu==1 & d$year==2004)]))
c$anc_vs_na_kz[3] = mean(na.omit(d$anc_vs_na[which(d$dummy_1km==1 & d$dummy_KwaZulu==1 & d$year==2006)]))
c$anc_vs_na_kz[4] = mean(na.omit(d$anc_vs_na[which(d$dummy_1km==1 & d$dummy_KwaZulu==1 & d$year==2009)]))
c$anc_vs_na_kz[5] = mean(na.omit(d$anc_vs_na[which(d$dummy_1km==1 & d$dummy_KwaZulu==1 & d$year==2011)]))

c$anc_vs_na_kz_hi[1] = mean(na.omit(d$anc_vs_na[which(d$dummy_1km==1 &d$dummy_KwaZulu==1 & d$year==2000)])) + 1.96*sd(na.omit(d$anc_vs_na[which(d$dummy_1km==1 &d$dummy_KwaZulu==1 & d$year==2000)]))/sqrt(length(which(d$dummy_1km==1 &d$dummy_KwaZulu==1 & d$year==2000)))
c$anc_vs_na_kz_hi[2] = mean(na.omit(d$anc_vs_na[which(d$dummy_1km==1 &d$dummy_KwaZulu==1 & d$year==2004)])) + 1.96*sd(na.omit(d$anc_vs_na[which(d$dummy_1km==1 &d$dummy_KwaZulu==1 & d$year==2004)]))/sqrt(length(which(d$dummy_1km==1 &d$dummy_KwaZulu==1 & d$year==2004))) 
c$anc_vs_na_kz_hi[3] = mean(na.omit(d$anc_vs_na[which(d$dummy_1km==1 &d$dummy_KwaZulu==1 & d$year==2006)])) + 1.96*sd(na.omit(d$anc_vs_na[which(d$dummy_1km==1 &d$dummy_KwaZulu==1 & d$year==2006)]))/sqrt(length(which(d$dummy_1km==1 &d$dummy_KwaZulu==1 & d$year==2006)))
c$anc_vs_na_kz_hi[4] = mean(na.omit(d$anc_vs_na[which(d$dummy_1km==1 &d$dummy_KwaZulu==1 & d$year==2009)])) + 1.96*sd(na.omit(d$anc_vs_na[which(d$dummy_1km==1 &d$dummy_KwaZulu==1 & d$year==2009)]))/sqrt(length(which(d$dummy_1km==1 &d$dummy_KwaZulu==1 & d$year==2009))) 
c$anc_vs_na_kz_hi[5] = mean(na.omit(d$anc_vs_na[which(d$dummy_1km==1 &d$dummy_KwaZulu==1 & d$year==2011)])) + 1.96*sd(na.omit(d$anc_vs_na[which(d$dummy_1km==1 &d$dummy_KwaZulu==1 & d$year==2011)]))/sqrt(length(which(d$dummy_1km==1 &d$dummy_KwaZulu==1 & d$year==2011))) 

c$anc_vs_na_kz_lo[1] = mean(na.omit(d$anc_vs_na[which(d$dummy_1km==1 &d$dummy_KwaZulu==1 & d$year==2000)])) - 1.96*sd(na.omit(d$anc_vs_na[which(d$dummy_1km==1 &d$dummy_KwaZulu==1 & d$year==2000)]))/sqrt(length(which(d$dummy_1km==1 &d$dummy_KwaZulu==1 & d$year==2000)))
c$anc_vs_na_kz_lo[2] = mean(na.omit(d$anc_vs_na[which(d$dummy_1km==1 &d$dummy_KwaZulu==1 & d$year==2004)])) - 1.96*sd(na.omit(d$anc_vs_na[which(d$dummy_1km==1 &d$dummy_KwaZulu==1 & d$year==2004)]))/sqrt(length(which(d$dummy_1km==1 &d$dummy_KwaZulu==1 & d$year==2004)))
c$anc_vs_na_kz_lo[3] = mean(na.omit(d$anc_vs_na[which(d$dummy_1km==1 &d$dummy_KwaZulu==1 & d$year==2006)])) - 1.96*sd(na.omit(d$anc_vs_na[which(d$dummy_1km==1 &d$dummy_KwaZulu==1 & d$year==2006)]))/sqrt(length(which(d$dummy_1km==1 &d$dummy_KwaZulu==1 & d$year==2006)))
c$anc_vs_na_kz_lo[4] = mean(na.omit(d$anc_vs_na[which(d$dummy_1km==1 &d$dummy_KwaZulu==1 & d$year==2009)])) - 1.96*sd(na.omit(d$anc_vs_na[which(d$dummy_1km==1 &d$dummy_KwaZulu==1 & d$year==2009)]))/sqrt(length(which(d$dummy_1km==1 &d$dummy_KwaZulu==1 & d$year==2009)))
c$anc_vs_na_kz_lo[5] = mean(na.omit(d$anc_vs_na[which(d$dummy_1km==1 &d$dummy_KwaZulu==1 & d$year==2011)])) - 1.96*sd(na.omit(d$anc_vs_na[which(d$dummy_1km==1 &d$dummy_KwaZulu==1 & d$year==2011)]))/sqrt(length(which(d$dummy_1km==1 &d$dummy_KwaZulu==1 & d$year==2011)))

##BANTUSTANS
c$anc_vs_na_bant[1] = mean(na.omit(d$anc_vs_na[which(d$dummy_1km==1 &d$dummy_KwaZulu==0 & d$dummy_all_tbvc==1 & d$year==2000)]))
c$anc_vs_na_bant[2] = mean(na.omit(d$anc_vs_na[which(d$dummy_1km==1 &d$dummy_KwaZulu==0 & d$dummy_all_tbvc==1 & d$year==2004)]))
c$anc_vs_na_bant[3] = mean(na.omit(d$anc_vs_na[which(d$dummy_1km==1 &d$dummy_KwaZulu==0 & d$dummy_all_tbvc==1 & d$year==2006)]))
c$anc_vs_na_bant[4] = mean(na.omit(d$anc_vs_na[which(d$dummy_1km==1 &d$dummy_KwaZulu==0 & d$dummy_all_tbvc==1 & d$year==2009)]))
c$anc_vs_na_bant[5] = mean(na.omit(d$anc_vs_na[which(d$dummy_1km==1 &d$dummy_KwaZulu==0 & d$dummy_all_tbvc==1 & d$year==2011)]))

c$anc_vs_na_bant_hi[1] = mean(na.omit(d$anc_vs_na[which(d$dummy_1km==1 &d$dummy_KwaZulu==0 & d$dummy_all_tbvc==1 & d$year==2000)])) + 1.96*sd(na.omit(d$anc_vs_na[which(d$dummy_1km==1 &d$dummy_KwaZulu==0 & d$dummy_all_tbvc==1 & d$year==2000)]))/sqrt(length(which(d$dummy_1km==1 &d$dummy_KwaZulu==0 & d$dummy_all_tbvc==1 & d$year==2000)))
c$anc_vs_na_bant_hi[2] = mean(na.omit(d$anc_vs_na[which(d$dummy_1km==1 &d$dummy_KwaZulu==0 & d$dummy_all_tbvc==1 & d$year==2004)])) + 1.96*sd(na.omit(d$anc_vs_na[which(d$dummy_1km==1 &d$dummy_KwaZulu==0 & d$dummy_all_tbvc==1 & d$year==2004)]))/sqrt(length(which(d$dummy_1km==1 &d$dummy_KwaZulu==0 & d$dummy_all_tbvc==1 & d$year==2004)))
c$anc_vs_na_bant_hi[3] = mean(na.omit(d$anc_vs_na[which(d$dummy_1km==1 &d$dummy_KwaZulu==0 & d$dummy_all_tbvc==1 & d$year==2006)])) + 1.96*sd(na.omit(d$anc_vs_na[which(d$dummy_1km==1 &d$dummy_KwaZulu==0 & d$dummy_all_tbvc==1 & d$year==2006)]))/sqrt(length(which(d$dummy_1km==1 &d$dummy_KwaZulu==0 & d$dummy_all_tbvc==1 & d$year==2006)))
c$anc_vs_na_bant_hi[4] = mean(na.omit(d$anc_vs_na[which(d$dummy_1km==1 &d$dummy_KwaZulu==0 & d$dummy_all_tbvc==1 & d$year==2009)])) + 1.96*sd(na.omit(d$anc_vs_na[which(d$dummy_1km==1 &d$dummy_KwaZulu==0 & d$dummy_all_tbvc==1 & d$year==2009)]))/sqrt(length(which(d$dummy_1km==1 &d$dummy_KwaZulu==0 & d$dummy_all_tbvc==1 & d$year==2009)))
c$anc_vs_na_bant_hi[5] = mean(na.omit(d$anc_vs_na[which(d$dummy_1km==1 &d$dummy_KwaZulu==0 & d$dummy_all_tbvc==1 & d$year==2011)])) + 1.96*sd(na.omit(d$anc_vs_na[which(d$dummy_1km==1 &d$dummy_KwaZulu==0 & d$dummy_all_tbvc==1 & d$year==2011)]))/sqrt(length(which(d$dummy_1km==1 &d$dummy_KwaZulu==0 & d$dummy_all_tbvc==1 & d$year==2011)))

c$anc_vs_na_bant_lo[1] = mean(na.omit(d$anc_vs_na[which(d$dummy_1km==1 &d$dummy_KwaZulu==0 & d$dummy_all_tbvc==1 & d$year==2000)])) - 1.96*sd(na.omit(d$anc_vs_na[which(d$dummy_1km==1 &d$dummy_KwaZulu==0 & d$dummy_all_tbvc==1 & d$year==2000)]))/sqrt(length(which(d$dummy_1km==1 &d$dummy_KwaZulu==0 & d$dummy_all_tbvc==1 & d$year==2000)))
c$anc_vs_na_bant_lo[2] = mean(na.omit(d$anc_vs_na[which(d$dummy_1km==1 &d$dummy_KwaZulu==0 & d$dummy_all_tbvc==1 & d$year==2004)])) - 1.96*sd(na.omit(d$anc_vs_na[which(d$dummy_1km==1 &d$dummy_KwaZulu==0 & d$dummy_all_tbvc==1 & d$year==2004)]))/sqrt(length(which(d$dummy_1km==1 &d$dummy_KwaZulu==0 & d$dummy_all_tbvc==1 & d$year==2004)))
c$anc_vs_na_bant_lo[3] = mean(na.omit(d$anc_vs_na[which(d$dummy_1km==1 &d$dummy_KwaZulu==0 & d$dummy_all_tbvc==1 & d$year==2006)])) - 1.96*sd(na.omit(d$anc_vs_na[which(d$dummy_1km==1 &d$dummy_KwaZulu==0 & d$dummy_all_tbvc==1 & d$year==2006)]))/sqrt(length(which(d$dummy_1km==1 &d$dummy_KwaZulu==0 & d$dummy_all_tbvc==1 & d$year==2006)))
c$anc_vs_na_bant_lo[4] = mean(na.omit(d$anc_vs_na[which(d$dummy_1km==1 &d$dummy_KwaZulu==0 & d$dummy_all_tbvc==1 & d$year==2009)])) - 1.96*sd(na.omit(d$anc_vs_na[which(d$dummy_1km==1 &d$dummy_KwaZulu==0 & d$dummy_all_tbvc==1 & d$year==2009)]))/sqrt(length(which(d$dummy_1km==1 &d$dummy_KwaZulu==0 & d$dummy_all_tbvc==1 & d$year==2009)))
c$anc_vs_na_bant_lo[5] = mean(na.omit(d$anc_vs_na[which(d$dummy_1km==1 &d$dummy_KwaZulu==0 & d$dummy_all_tbvc==1 & d$year==2011)])) - 1.96*sd(na.omit(d$anc_vs_na[which(d$dummy_1km==1 &d$dummy_KwaZulu==0 & d$dummy_all_tbvc==1 & d$year==2011)]))/sqrt(length(which(d$dummy_1km==1 &d$dummy_KwaZulu==0 & d$dummy_all_tbvc==1 & d$year==2011)))

##ALL
c$anc_vs_na_all[1] = mean(na.omit(d$anc_vs_na[which(d$dummy_1km==1 &d$dummy_all_tbvc==0 & d$year==2000)])) 
c$anc_vs_na_all[2] = mean(na.omit(d$anc_vs_na[which(d$dummy_1km==1 &d$dummy_all_tbvc==0 & d$year==2004)]))
c$anc_vs_na_all[3] = mean(na.omit(d$anc_vs_na[which(d$dummy_1km==1 &d$dummy_all_tbvc==0 & d$year==2006)]))
c$anc_vs_na_all[4] = mean(na.omit(d$anc_vs_na[which(d$dummy_1km==1 &d$dummy_all_tbvc==0 & d$year==2009)]))
c$anc_vs_na_all[5] = mean(na.omit(d$anc_vs_na[which(d$dummy_1km==1 &d$dummy_all_tbvc==0 & d$year==2011)]))

c$anc_vs_na_all_hi[1] = mean(na.omit(d$anc_vs_na[which(d$dummy_1km==1 &d$dummy_all_tbvc==0 & d$year==2000)])) + 1.96*sd(na.omit(d$anc_vs_na[which(d$dummy_1km==1 &d$dummy_all_tbvc==0 & d$year==2000)]))/sqrt(length(which(d$dummy_1km==1 &d$dummy_all_tbvc==0 & d$year==2000)))
c$anc_vs_na_all_hi[2] = mean(na.omit(d$anc_vs_na[which(d$dummy_1km==1 &d$dummy_all_tbvc==0 & d$year==2004)])) + 1.96*sd(na.omit(d$anc_vs_na[which(d$dummy_1km==1 &d$dummy_all_tbvc==0 & d$year==2004)]))/sqrt(length(which(d$dummy_1km==1 &d$dummy_all_tbvc==0 & d$year==2004)))
c$anc_vs_na_all_hi[3] = mean(na.omit(d$anc_vs_na[which(d$dummy_1km==1 &d$dummy_all_tbvc==0 & d$year==2006)])) + 1.96*sd(na.omit(d$anc_vs_na[which(d$dummy_1km==1 &d$dummy_all_tbvc==0 & d$year==2006)]))/sqrt(length(which(d$dummy_1km==1 &d$dummy_all_tbvc==0 & d$year==2006)))
c$anc_vs_na_all_hi[4] = mean(na.omit(d$anc_vs_na[which(d$dummy_1km==1 &d$dummy_all_tbvc==0 & d$year==2009)])) + 1.96*sd(na.omit(d$anc_vs_na[which(d$dummy_1km==1 &d$dummy_all_tbvc==0 & d$year==2009)]))/sqrt(length(which(d$dummy_1km==1 &d$dummy_all_tbvc==0 & d$year==2009)))
c$anc_vs_na_all_hi[5] = mean(na.omit(d$anc_vs_na[which(d$dummy_1km==1 &d$dummy_all_tbvc==0 & d$year==2011)])) + 1.96*sd(na.omit(d$anc_vs_na[which(d$dummy_1km==1 &d$dummy_all_tbvc==0 & d$year==2011)]))/sqrt(length(which(d$dummy_1km==1 &d$dummy_all_tbvc==0 & d$year==2011)))

c$anc_vs_na_all_lo[1] = mean(na.omit(d$anc_vs_na[which(d$dummy_1km==1 &d$dummy_all_tbvc==0 & d$year==2000)])) - 1.96*sd(na.omit(d$anc_vs_na[which(d$dummy_1km==1 &d$dummy_all_tbvc==0 & d$year==2000)]))/sqrt(length(which(d$dummy_1km==1 &d$dummy_all_tbvc==0 & d$year==2000)))
c$anc_vs_na_all_lo[2] = mean(na.omit(d$anc_vs_na[which(d$dummy_1km==1 &d$dummy_all_tbvc==0 & d$year==2004)])) - 1.96*sd(na.omit(d$anc_vs_na[which(d$dummy_1km==1 &d$dummy_all_tbvc==0 & d$year==2004)]))/sqrt(length(which(d$dummy_1km==1 &d$dummy_all_tbvc==0 & d$year==2004)))
c$anc_vs_na_all_lo[3] = mean(na.omit(d$anc_vs_na[which(d$dummy_1km==1 &d$dummy_all_tbvc==0 & d$year==2006)])) - 1.96*sd(na.omit(d$anc_vs_na[which(d$dummy_1km==1 &d$dummy_all_tbvc==0 & d$year==2006)]))/sqrt(length(which(d$dummy_1km==1 &d$dummy_all_tbvc==0 & d$year==2006)))
c$anc_vs_na_all_lo[4] = mean(na.omit(d$anc_vs_na[which(d$dummy_1km==1 &d$dummy_all_tbvc==0 & d$year==2009)])) - 1.96*sd(na.omit(d$anc_vs_na[which(d$dummy_1km==1 &d$dummy_all_tbvc==0 & d$year==2009)]))/sqrt(length(which(d$dummy_1km==1 &d$dummy_all_tbvc==0 & d$year==2009)))
c$anc_vs_na_all_lo[5] = mean(na.omit(d$anc_vs_na[which(d$dummy_1km==1 &d$dummy_all_tbvc==0 & d$year==2011)])) - 1.96*sd(na.omit(d$anc_vs_na[which(d$dummy_1km==1 &d$dummy_all_tbvc==0 & d$year==2011)]))/sqrt(length(which(d$dummy_1km==1 &d$dummy_all_tbvc==0 & d$year==2011)))

##Plot:
pdf(file="diff_in_diff.pdf",width=9,height=6)
plot(c$year,c$anc_vs_na_kz, ylim=c(0,1), type="l",lwd=2,axes=F, ylab="ANC Vote Share", xlab="Election Year")
axis(1, at=c(2000,2004,2006,2009,2011))
axis(2)
rect(2007,-1,2011,1, border=NA, col='#84848477')
points(c$year,c$anc_vs_na_kz, ylim=c(0,1),pch=16)
for(i in 1:5){
  arrows(c$year[i], c$anc_vs_na_kz_lo[i], c$year[i], c$anc_vs_na_kz_hi[i], angle=90, length=.05, lwd = 2, code=3)
}
points(c$year,c$anc_vs_na_bant, ylim=c(0,1), type="l", lty=2,lwd=2)
points(c$year,c$anc_vs_na_bant, ylim=c(0,1),pch=18)
for(i in 1:5){
  arrows(c$year[i], c$anc_vs_na_bant_lo[i], c$year[i], c$anc_vs_na_bant_hi[i], angle=90, length=.05, lwd = 2, code=3)
}
points(c$year,c$anc_vs_na_all, ylim=c(0,1), type="l", lty=4,lwd=2)
points(c$year,c$anc_vs_na_all, ylim=c(0,1),pch=19)
for(i in 1:5){
  arrows(c$year[i], c$anc_vs_na_all_lo[i], c$year[i], c$anc_vs_na_all_hi[i], angle=90, length=.05, lwd = 2, code=3)
}
text(2007,0.2,"Ethnic shift\nin ANC elite",pos=4, col="white")
legend(2000,0.2,c("KwaZulu","Bantustans excl. KZ","Rest of South Africa"),
       lty=c(1,2,4), lwd=c(2,2,2),box.col="white")
dev.off()

###Appendix Plot:
##Take conditional means and CIs for Whole Country
##KWA ZULU
c$anc_vs_na_kz[1] = mean(na.omit(d$anc_vs_na[which( d$dummy_KwaZulu==1 & d$year==2000)]))
c$anc_vs_na_kz[2] = mean(na.omit(d$anc_vs_na[which( d$dummy_KwaZulu==1 & d$year==2004)]))
c$anc_vs_na_kz[3] = mean(na.omit(d$anc_vs_na[which( d$dummy_KwaZulu==1 & d$year==2006)]))
c$anc_vs_na_kz[4] = mean(na.omit(d$anc_vs_na[which( d$dummy_KwaZulu==1 & d$year==2009)]))
c$anc_vs_na_kz[5] = mean(na.omit(d$anc_vs_na[which( d$dummy_KwaZulu==1 & d$year==2011)]))

c$anc_vs_na_kz_hi[1] = mean(na.omit(d$anc_vs_na[which(d$dummy_KwaZulu==1 & d$year==2000)])) + 1.96*sd(na.omit(d$anc_vs_na[which(d$dummy_KwaZulu==1 & d$year==2000)]))/sqrt(length(which(d$dummy_KwaZulu==1 & d$year==2000)))
c$anc_vs_na_kz_hi[2] = mean(na.omit(d$anc_vs_na[which(d$dummy_KwaZulu==1 & d$year==2004)])) + 1.96*sd(na.omit(d$anc_vs_na[which(d$dummy_KwaZulu==1 & d$year==2004)]))/sqrt(length(which(d$dummy_KwaZulu==1 & d$year==2004))) 
c$anc_vs_na_kz_hi[3] = mean(na.omit(d$anc_vs_na[which(d$dummy_KwaZulu==1 & d$year==2006)])) + 1.96*sd(na.omit(d$anc_vs_na[which(d$dummy_KwaZulu==1 & d$year==2006)]))/sqrt(length(which(d$dummy_KwaZulu==1 & d$year==2006)))
c$anc_vs_na_kz_hi[4] = mean(na.omit(d$anc_vs_na[which(d$dummy_KwaZulu==1 & d$year==2009)])) + 1.96*sd(na.omit(d$anc_vs_na[which(d$dummy_KwaZulu==1 & d$year==2009)]))/sqrt(length(which(d$dummy_KwaZulu==1 & d$year==2009))) 
c$anc_vs_na_kz_hi[5] = mean(na.omit(d$anc_vs_na[which(d$dummy_KwaZulu==1 & d$year==2011)])) + 1.96*sd(na.omit(d$anc_vs_na[which(d$dummy_KwaZulu==1 & d$year==2011)]))/sqrt(length(which(d$dummy_KwaZulu==1 & d$year==2011))) 

c$anc_vs_na_kz_lo[1] = mean(na.omit(d$anc_vs_na[which(d$dummy_KwaZulu==1 & d$year==2000)])) - 1.96*sd(na.omit(d$anc_vs_na[which(d$dummy_KwaZulu==1 & d$year==2000)]))/sqrt(length(which(d$dummy_KwaZulu==1 & d$year==2000)))
c$anc_vs_na_kz_lo[2] = mean(na.omit(d$anc_vs_na[which(d$dummy_KwaZulu==1 & d$year==2004)])) - 1.96*sd(na.omit(d$anc_vs_na[which(d$dummy_KwaZulu==1 & d$year==2004)]))/sqrt(length(which(d$dummy_KwaZulu==1 & d$year==2004)))
c$anc_vs_na_kz_lo[3] = mean(na.omit(d$anc_vs_na[which(d$dummy_KwaZulu==1 & d$year==2006)])) - 1.96*sd(na.omit(d$anc_vs_na[which(d$dummy_KwaZulu==1 & d$year==2006)]))/sqrt(length(which(d$dummy_KwaZulu==1 & d$year==2006)))
c$anc_vs_na_kz_lo[4] = mean(na.omit(d$anc_vs_na[which(d$dummy_KwaZulu==1 & d$year==2009)])) - 1.96*sd(na.omit(d$anc_vs_na[which(d$dummy_KwaZulu==1 & d$year==2009)]))/sqrt(length(which(d$dummy_KwaZulu==1 & d$year==2009)))
c$anc_vs_na_kz_lo[5] = mean(na.omit(d$anc_vs_na[which(d$dummy_KwaZulu==1 & d$year==2011)])) - 1.96*sd(na.omit(d$anc_vs_na[which(d$dummy_KwaZulu==1 & d$year==2011)]))/sqrt(length(which(d$dummy_KwaZulu==1 & d$year==2011)))

##BANTUSTANS
c$anc_vs_na_bant[1] = mean(na.omit(d$anc_vs_na[which(d$dummy_KwaZulu==0 & d$dummy_all_tbvc==1 & d$year==2000)]))
c$anc_vs_na_bant[2] = mean(na.omit(d$anc_vs_na[which(d$dummy_KwaZulu==0 & d$dummy_all_tbvc==1 & d$year==2004)]))
c$anc_vs_na_bant[3] = mean(na.omit(d$anc_vs_na[which(d$dummy_KwaZulu==0 & d$dummy_all_tbvc==1 & d$year==2006)]))
c$anc_vs_na_bant[4] = mean(na.omit(d$anc_vs_na[which(d$dummy_KwaZulu==0 & d$dummy_all_tbvc==1 & d$year==2009)]))
c$anc_vs_na_bant[5] = mean(na.omit(d$anc_vs_na[which(d$dummy_KwaZulu==0 & d$dummy_all_tbvc==1 & d$year==2011)]))

c$anc_vs_na_bant_hi[1] = mean(na.omit(d$anc_vs_na[which(d$dummy_KwaZulu==0 & d$dummy_all_tbvc==1 & d$year==2000)])) + 1.96*sd(na.omit(d$anc_vs_na[which(d$dummy_KwaZulu==0 & d$dummy_all_tbvc==1 & d$year==2000)]))/sqrt(length(which(d$dummy_KwaZulu==0 & d$dummy_all_tbvc==1 & d$year==2000)))
c$anc_vs_na_bant_hi[2] = mean(na.omit(d$anc_vs_na[which(d$dummy_KwaZulu==0 & d$dummy_all_tbvc==1 & d$year==2004)])) + 1.96*sd(na.omit(d$anc_vs_na[which(d$dummy_KwaZulu==0 & d$dummy_all_tbvc==1 & d$year==2004)]))/sqrt(length(which(d$dummy_KwaZulu==0 & d$dummy_all_tbvc==1 & d$year==2004)))
c$anc_vs_na_bant_hi[3] = mean(na.omit(d$anc_vs_na[which(d$dummy_KwaZulu==0 & d$dummy_all_tbvc==1 & d$year==2006)])) + 1.96*sd(na.omit(d$anc_vs_na[which(d$dummy_KwaZulu==0 & d$dummy_all_tbvc==1 & d$year==2006)]))/sqrt(length(which(d$dummy_KwaZulu==0 & d$dummy_all_tbvc==1 & d$year==2006)))
c$anc_vs_na_bant_hi[4] = mean(na.omit(d$anc_vs_na[which(d$dummy_KwaZulu==0 & d$dummy_all_tbvc==1 & d$year==2009)])) + 1.96*sd(na.omit(d$anc_vs_na[which(d$dummy_KwaZulu==0 & d$dummy_all_tbvc==1 & d$year==2009)]))/sqrt(length(which(d$dummy_KwaZulu==0 & d$dummy_all_tbvc==1 & d$year==2009)))
c$anc_vs_na_bant_hi[5] = mean(na.omit(d$anc_vs_na[which(d$dummy_KwaZulu==0 & d$dummy_all_tbvc==1 & d$year==2011)])) + 1.96*sd(na.omit(d$anc_vs_na[which(d$dummy_KwaZulu==0 & d$dummy_all_tbvc==1 & d$year==2011)]))/sqrt(length(which(d$dummy_KwaZulu==0 & d$dummy_all_tbvc==1 & d$year==2011)))

c$anc_vs_na_bant_lo[1] = mean(na.omit(d$anc_vs_na[which(d$dummy_KwaZulu==0 & d$dummy_all_tbvc==1 & d$year==2000)])) - 1.96*sd(na.omit(d$anc_vs_na[which(d$dummy_KwaZulu==0 & d$dummy_all_tbvc==1 & d$year==2000)]))/sqrt(length(which(d$dummy_KwaZulu==0 & d$dummy_all_tbvc==1 & d$year==2000)))
c$anc_vs_na_bant_lo[2] = mean(na.omit(d$anc_vs_na[which(d$dummy_KwaZulu==0 & d$dummy_all_tbvc==1 & d$year==2004)])) - 1.96*sd(na.omit(d$anc_vs_na[which(d$dummy_KwaZulu==0 & d$dummy_all_tbvc==1 & d$year==2004)]))/sqrt(length(which(d$dummy_KwaZulu==0 & d$dummy_all_tbvc==1 & d$year==2004)))
c$anc_vs_na_bant_lo[3] = mean(na.omit(d$anc_vs_na[which(d$dummy_KwaZulu==0 & d$dummy_all_tbvc==1 & d$year==2006)])) - 1.96*sd(na.omit(d$anc_vs_na[which(d$dummy_KwaZulu==0 & d$dummy_all_tbvc==1 & d$year==2006)]))/sqrt(length(which(d$dummy_KwaZulu==0 & d$dummy_all_tbvc==1 & d$year==2006)))
c$anc_vs_na_bant_lo[4] = mean(na.omit(d$anc_vs_na[which(d$dummy_KwaZulu==0 & d$dummy_all_tbvc==1 & d$year==2009)])) - 1.96*sd(na.omit(d$anc_vs_na[which(d$dummy_KwaZulu==0 & d$dummy_all_tbvc==1 & d$year==2009)]))/sqrt(length(which(d$dummy_KwaZulu==0 & d$dummy_all_tbvc==1 & d$year==2009)))
c$anc_vs_na_bant_lo[5] = mean(na.omit(d$anc_vs_na[which(d$dummy_KwaZulu==0 & d$dummy_all_tbvc==1 & d$year==2011)])) - 1.96*sd(na.omit(d$anc_vs_na[which(d$dummy_KwaZulu==0 & d$dummy_all_tbvc==1 & d$year==2011)]))/sqrt(length(which(d$dummy_KwaZulu==0 & d$dummy_all_tbvc==1 & d$year==2011)))

##ALL
c$anc_vs_na_all[1] = mean(na.omit(d$anc_vs_na[which(d$dummy_all_tbvc==0 & d$year==2000)])) 
c$anc_vs_na_all[2] = mean(na.omit(d$anc_vs_na[which(d$dummy_all_tbvc==0 & d$year==2004)]))
c$anc_vs_na_all[3] = mean(na.omit(d$anc_vs_na[which(d$dummy_all_tbvc==0 & d$year==2006)]))
c$anc_vs_na_all[4] = mean(na.omit(d$anc_vs_na[which(d$dummy_all_tbvc==0 & d$year==2009)]))
c$anc_vs_na_all[5] = mean(na.omit(d$anc_vs_na[which(d$dummy_all_tbvc==0 & d$year==2011)]))

c$anc_vs_na_all_hi[1] = mean(na.omit(d$anc_vs_na[which(d$dummy_all_tbvc==0 & d$year==2000)])) + 1.96*sd(na.omit(d$anc_vs_na[which(d$dummy_all_tbvc==0 & d$year==2000)]))/sqrt(length(which(d$dummy_all_tbvc==0 & d$year==2000)))
c$anc_vs_na_all_hi[2] = mean(na.omit(d$anc_vs_na[which(d$dummy_all_tbvc==0 & d$year==2004)])) + 1.96*sd(na.omit(d$anc_vs_na[which(d$dummy_all_tbvc==0 & d$year==2004)]))/sqrt(length(which(d$dummy_all_tbvc==0 & d$year==2004)))
c$anc_vs_na_all_hi[3] = mean(na.omit(d$anc_vs_na[which(d$dummy_all_tbvc==0 & d$year==2006)])) + 1.96*sd(na.omit(d$anc_vs_na[which(d$dummy_all_tbvc==0 & d$year==2006)]))/sqrt(length(which(d$dummy_all_tbvc==0 & d$year==2006)))
c$anc_vs_na_all_hi[4] = mean(na.omit(d$anc_vs_na[which(d$dummy_all_tbvc==0 & d$year==2009)])) + 1.96*sd(na.omit(d$anc_vs_na[which(d$dummy_all_tbvc==0 & d$year==2009)]))/sqrt(length(which(d$dummy_all_tbvc==0 & d$year==2009)))
c$anc_vs_na_all_hi[5] = mean(na.omit(d$anc_vs_na[which(d$dummy_all_tbvc==0 & d$year==2011)])) + 1.96*sd(na.omit(d$anc_vs_na[which(d$dummy_all_tbvc==0 & d$year==2011)]))/sqrt(length(which(d$dummy_all_tbvc==0 & d$year==2011)))

c$anc_vs_na_all_lo[1] = mean(na.omit(d$anc_vs_na[which(d$dummy_all_tbvc==0 & d$year==2000)])) - 1.96*sd(na.omit(d$anc_vs_na[which(d$dummy_all_tbvc==0 & d$year==2000)]))/sqrt(length(which(d$dummy_all_tbvc==0 & d$year==2000)))
c$anc_vs_na_all_lo[2] = mean(na.omit(d$anc_vs_na[which(d$dummy_all_tbvc==0 & d$year==2004)])) - 1.96*sd(na.omit(d$anc_vs_na[which(d$dummy_all_tbvc==0 & d$year==2004)]))/sqrt(length(which(d$dummy_all_tbvc==0 & d$year==2004)))
c$anc_vs_na_all_lo[3] = mean(na.omit(d$anc_vs_na[which(d$dummy_all_tbvc==0 & d$year==2006)])) - 1.96*sd(na.omit(d$anc_vs_na[which(d$dummy_all_tbvc==0 & d$year==2006)]))/sqrt(length(which(d$dummy_all_tbvc==0 & d$year==2006)))
c$anc_vs_na_all_lo[4] = mean(na.omit(d$anc_vs_na[which(d$dummy_all_tbvc==0 & d$year==2009)])) - 1.96*sd(na.omit(d$anc_vs_na[which(d$dummy_all_tbvc==0 & d$year==2009)]))/sqrt(length(which(d$dummy_all_tbvc==0 & d$year==2009)))
c$anc_vs_na_all_lo[5] = mean(na.omit(d$anc_vs_na[which(d$dummy_all_tbvc==0 & d$year==2011)])) - 1.96*sd(na.omit(d$anc_vs_na[which(d$dummy_all_tbvc==0 & d$year==2011)]))/sqrt(length(which(d$dummy_1km==1 &d$dummy_all_tbvc==0 & d$year==2011)))

##Appendix Plot:
pdf(file="appendix_diff_in_diff.pdf",width=9,height=6)
plot(c$year,c$anc_vs_na_kz, ylim=c(0,1), type="l",lwd=2,axes=F, ylab="ANC Vote Share", xlab="Election Year")
axis(1, at=c(2000,2004,2006,2009,2011))
axis(2)
rect(2007,-1,2011,1, border=NA, col='#84848477')
points(c$year,c$anc_vs_na_kz, ylim=c(0,1),pch=16)
for(i in 1:5){
  arrows(c$year[i], c$anc_vs_na_kz_lo[i], c$year[i], c$anc_vs_na_kz_hi[i], angle=90, length=.05, lwd = 2, code=3)
}
points(c$year,c$anc_vs_na_bant, ylim=c(0,1), type="l", lty=2,lwd=2)
points(c$year,c$anc_vs_na_bant, ylim=c(0,1),pch=18)
for(i in 1:5){
  arrows(c$year[i], c$anc_vs_na_bant_lo[i], c$year[i], c$anc_vs_na_bant_hi[i], angle=90, length=.05, lwd = 2, code=3)
}
points(c$year,c$anc_vs_na_all, ylim=c(0,1), type="l", lty=4,lwd=2)
points(c$year,c$anc_vs_na_all, ylim=c(0,1),pch=19)
for(i in 1:5){
  arrows(c$year[i], c$anc_vs_na_all_lo[i], c$year[i], c$anc_vs_na_all_hi[i], angle=90, length=.05, lwd = 2, code=3)
}
text(2007,0.2,"Ethnic shift\nin ANC elite",pos=4, col="white")
legend(2000,0.2,c("KwaZulu","Bantustans excl. KZ","Rest of South Africa"),
       lty=c(1,2,4), lwd=c(2,2,2),box.col="white")
dev.off()

####Appedix Density PlotS:
data = d
##Economics:
pdf(file="appendix_balance_density_income_all.pdf")
plot(density(na.omit(data$income[which(data$dummy_1km==1 & data$dummy_all_tbvc==1)])),col="blue",lwd=2,
     main="Income",xlab="Income")
lines(density(na.omit(data$income[which(data$dummy_1km==1 & data$dummy_all_tbvc==0)])),col="red",lwd=2)
legend(1000,0.001,col=c("blue","red"),lwd=c(2,2),legend=c("1km Within Bantustan (treated)", "1km Outside Bantustan (control)"),bty="n")
dev.off()

pdf(file="appendix_balance_density_unemploy_rate_all.pdf")
plot(density(na.omit(data$unemploy_rate[which(data$dummy_1km==1 & data$dummy_all_tbvc==1)])),col="blue",lwd=2,
     main="Unemployment Rate",xlab="Unemployment Rate")
lines(density(na.omit(data$unemploy_rate[which(data$dummy_1km==1 & data$dummy_all_tbvc==0)])),col="red",lwd=2)
dev.off()

pdf(file="appendix_balance_density_sector_rate_all.pdf")
plot(density(na.omit(data$sector[which(data$dummy_1km==1 & data$dummy_all_tbvc==1)])),col="blue",lwd=2,
     main="Sector Formality",xlab="Sector Formality")
lines(density(na.omit(data$sector[which(data$dummy_1km==1 & data$dummy_all_tbvc==0)])),col="red",lwd=2)
dev.off()

pdf(file="appendix_balance_density_school_all.pdf")
plot(density(na.omit(data$school_complete[which(data$dummy_1km==1 & data$dummy_all_tbvc==1)])),col="blue",lwd=2,
     main="School Completion",xlab="School Completion")
lines(density(na.omit(data$school_complete[which(data$dummy_1km==1 & data$dummy_all_tbvc==0)])),col="red",lwd=2)
dev.off()


##Demographics:
pdf(file="appendix_balance_density_white_all.pdf")
plot(density(na.omit(data$white_frac[which(data$dummy_1km==1 & data$dummy_all_tbvc==1)])),col="blue",lwd=2,
     main="White Fraction",xlab="White Fraction")
lines(density(na.omit(data$white_frac[which(data$dummy_1km==1 & data$dummy_all_tbvc==0)])),col="red",lwd=2)
legend(0.1,400,col=c("blue","red"),lwd=c(2,2),legend=c("1km Within Bantustan (treated)", "1km Outside Bantustan (control)"),bty="n")
dev.off()

pdf(file="appendix_balance_density_pop_all.pdf")
plot(density(na.omit(data$ln_pop[which(data$dummy_1km==1 & data$dummy_all_tbvc==1)])),col="blue",lwd=2,
     main="Population (logged)",xlab="Population (logged)")
lines(density(na.omit(data$ln_pop[which(data$dummy_1km==1 & data$dummy_all_tbvc==0)])),col="red",lwd=2)
dev.off()

pdf(file="appendix_balance_density_popdens_all.pdf")
plot(density(na.omit(data$ln_pop_density[which(data$dummy_1km==1 & data$dummy_all_tbvc==1)])),col="blue",lwd=2,
     main="Population Density (logged)",xlab="Population Density (logged)")
lines(density(na.omit(data$ln_pop_density[which(data$dummy_1km==1 & data$dummy_all_tbvc==0)])),col="red",lwd=2)
dev.off()

pdf(file="appendix_balance_density_gender_all.pdf")
plot(density(na.omit(data$gender[which(data$dummy_1km==1 & data$dummy_all_tbvc==1)])),col="blue",lwd=2,
     main="Sex",xlab="Sex (Female=1)")
lines(density(na.omit(data$gender[which(data$dummy_1km==1 & data$dummy_all_tbvc==0)])),col="red",lwd=2)
dev.off()

##Zulu demographics:
pdf(file="appendix_balance_density_zulufrac.pdf")
plot(density(na.omit(data$isizulu_frac[which(data$dummy_KwaZulu_1km==1 & data$dummy_KwaZulu==1)])),col="blue",lwd=2,
     main="Zulu Fraction",xlab="Zulu Fraction")
lines(density(na.omit(data$isizulu_frac[which(data$dummy_KwaZulu_1km==1 & data$dummy_KwaZulu==0)])),col="red",lwd=2)
legend(0,15,col=c("blue","red"),lwd=c(2,2),legend=c("1km Within KwaZulu (treated)", "1km Outside KwaZulu (control)"),bty="n")
dev.off()

